clear all
capture log close
program drop _all
set more off
snapshot erase _all
set seed 23423

*************************************************************************************
*************************************************************************************
*************************************************************************************
*** Updating Elephant Chart.do
*** Brina Seidel
*** February 2018
***
*** This program does the following:
*** 	1. Defines programs to 
***			- mark a consistent sample,
***			- produce an anonymous growth incidence curve (elephant chart), 
***			- produce a table showing the country composition of the elephant chart, and
***			- produce a quasi-non-anonymous growth incidence curve.
*** 	2. Produce growth incidence curves using various datasets and time periods, as follows
***			- Original Lakner-Milanovic (LM) data
***			- Original LM data + consistent sample
***			- Original LM countries + consistent sample + 2011 PPP quantile incomes from PovcalNet, if possible, and from LM, if not
*** 		- Original LM countries + consistent sample + 2011 PPP quantile incomes from PovcalNet, if possible, and from LM otherwise + adjustments to line up year
*** 		- New data + percentiles + consistent sample + adjustments to bin year
*** 		- New data + filled in by growing survey means in accordance with national accounts and using distributional data from any year
*** 		- New data + filled in + deciles + non-anonymous
***		3. Use the data already calculated to produce charts for the publication
*************************************************************************************
*************************************************************************************
*************************************************************************************


*** Set your working directory here
global directory = "xxxxxxxxxxxxxxxxxxxx"
cd "$directory"

*** This will re-set your PLUS folder so that you can use our custom graph scheme. You may wish to note down your current PLUS folder so that you can change it back when you are done with this program (type "sysdir list" in the command window)
sysdir set PLUS "$directory/Ado\Plus"

*** Set graph scheme
set scheme graphscheme
graph set window fontface "Calibri"

*** Install user-written SSC programs 
ssc install wbopendata

*************************************************************************************
*************************************************************************************
*************************************************************************************
*** 	1. Define programs to 
***			- mark a consistent sample,
***			- produce an anonymous growth incidence curve (elephant chart), 
***			- produce a table showing the country composition of the elephant chart, and
***			- produce a quasi-non-anonymous growth incidence curve.
*************************************************************************************
*************************************************************************************
*************************************************************************************

*****************************************************************
*****************************************************************
*** Define a program to mark a consistent sample (using the same countries in the start year and the end year)
***		`1' = start year
***		`2' = last year
*** 	`3' = quantile income variable
*****************************************************************
*****************************************************************
capture program drop CONSISTENT_SAMPLE
program define CONSISTENT_SAMPLE

	args first_year last_year quant_inc
	
	bys country: egen min_bin_year = min(bin_year) if bin_year >= `first_year' & `quant_inc' < .
	by country: egen max_bin_year = max(bin_year) if bin_year <= `last_year' & `quant_inc' < .
	gen both_periods = (min_bin_year == `first_year' & max_bin_year == `last_year')
	drop min_bin_year max_bin_year
	
end

*****************************************************************
*****************************************************************
*** Define a program to make an elephant chart
***		`1' = variable with decile mean incomes
***		`2' = first year
***		`3' = last year
***		`4' = sample to use
***		`5' = dummy = 1 if ventiles should be used; dummy = 0 if percentiles should be used
***		`6' = name of graph to save
***		`7' = name of graph for labels
*****************************************************************
*****************************************************************

capture program drop MAKE_ELEPHANT
program define MAKE_ELEPHANT 
	
	args quant_inc first_year last_year sample ventiles graph_name graph_name_long
	
	preserve
	
	keep if `sample' & inlist(bin_year, `first_year', `last_year')
	
	**************************************
	*** Assign global p's (percentiles of population, ordered by income)
	**************************************
	gen global_p = .
	foreach year in `first_year' `last_year' {
		xtile global_p`year' = `quant_inc' [aweight=pop] if bin_year == `year', n(100)
		replace global_p = global_p`year' if bin_year == `year'
		drop global_p`year'
	}
	*** Make ventiles plus top 1%, per L&M
	if `ventiles'==1 {
		rename global_p global_p_orig
		gen global_p = ceil(global_p_orig/5)*5
		replace global_p = 99 if global_p_orig >= 96 & global_p_orig <= 99
	}
	**************************************
	*** Get mean income for each global percentile in year year, and export to excel
	**************************************
	sort global_p bin_year countryname p
	
	label variable `quant_inc' "Quantile Average Income"
	label variable pop "Quantile Population"
	label variable global_p "Global Quantile"
	label variable p "Within-County Quantile"
	
	export excel global_p bin_year countryname urbrur p `quant_inc' pop if bin_year ==`first_year' & global_p < . using "Tables/Elephant Graphs.xlsx", sheet(`graph_name') firstrow(varlabels) sheetmodify cell(A1)
	
	export excel global_p bin_year countryname urbrur p `quant_inc' pop if bin_year ==`last_year' & global_p < . using "Tables/Elephant Graphs.xlsx", sheet(`graph_name') firstrow(varlabels) sheetmodify cell(I1)

	collapse (mean) `quant_inc' [pweight=pop], by(bin_year global_p)
	
	**************************************
	*** Calculate growth in incomes for each global percentile
	**************************************

	*** Reshape to wide
	reshape wide `quant_inc', i(global_p) j(bin_year)

	*** Get growth rates from 1988 to 2008 
	gen growth`first_year'_`last_year' = (`quant_inc'`last_year'-`quant_inc'`first_year')/`quant_inc'`first_year'

	*** Plot growth rates by percentile
	twoway (connected growth`first_year'_`last_year' global_p), ///
		title("`graph_name'") name(`graph_name', replace) legend(off)
	graph export "Graphs/Elephant Chart - `graph_name'.png", replace
	putexcel set "Tables/Elephant Graphs.xlsx", sheet(`graph_name') modify
	putexcel Q1 = picture("Graphs/Elephant Chart - `graph_name'.png")
	erase "Graphs/Elephant Chart - `graph_name'.png"
	
	*** Show numbers & export to a document
	keep global_p growth`first_year'_`last_year'
	rename growth`first_year'_`last_year' growth
	gen graph_name = "`graph_name'"
	gen graph_name_long = "`graph_name_long'"
	drop if global_p == .
	append using "Output Data/Elephant Charts.dta"
	save "Output Data/Elephant Charts.dta", replace

	restore
end

*****************************************************************
*****************************************************************
*** Define a program to make an table summarizing the elephant chart
***		`1' = variable with decile mean incomes
***		`2' = first year
***		`3' = last year
***		`4' = sample to use
***		`5' = name of graph to save
***		`6' = name of graph for labels
*****************************************************************
*****************************************************************

capture program drop MAKE_ELEPHANT_TABLE
program define MAKE_ELEPHANT_TABLE
	
	args quant_inc first_year last_year sample graph_name graph_name_long
	
	preserve
	
	quietly keep if `sample' & inlist(bin_year, `first_year', `last_year')
	
	**************************************
	*** Assign global p's (percentiles of population, ordered by income)
	**************************************
	quietly gen global_p = .
	foreach year in `first_year' `last_year' {
		quietly xtile global_p`year' = `quant_inc' [aweight=pop] if bin_year == `year', n(100)
		quietly replace global_p = global_p`year' if bin_year == `year'
		quietly drop global_p`year'
	}
	*** Make ventiles plus top 1%, per L&M
	rename global_p global_p_orig
	quietly gen global_p = ceil(global_p_orig/5)*5
	quietly replace global_p = 99 if global_p_orig >= 96 & global_p_orig <= 99

	**************************************
	*** Calculate composition of each global ventile, in each period
	**************************************
	
	*** Merge in regions
	quietly merge m:1 countryname using "Input Data/Regions.dta", keep(1 3) nogen 

	*** Mark categories of interest
	quietly gen category = countryname if inlist(countryname, "India", "China", "United States", "United Kingdom", "Indonesia", "Nigeria", "Brazil", "Japan", "Mexico")
	quietly replace category = "Egypt" if countryname == "Egypt, Arab Rep."
	quietly replace category = "Russia" if countryname == "Russian Federation"
	quietly replace category = "Other Former Soviet States" if inlist(countryname, "Armenia", "Azerbaijan", "Belarus", "Estonia", "Georgia", "Kazakhstan", "Kyrgyz Republic", "Latvia", "Lithuania") | ///
		inlist(countryname, "Moldova", "Tajikistan", "Turkmenistan", "Ukraine", "Uzbekistan")
	quietly replace category = "Other Europe" if (strpos(region, "Europe") | countryname == "Kosovo") & category == ""
	quietly replace category = "Other East Asia & Pacific" if countryname == "Taiwan"
	quietly replace category = "Other " + region if category == ""
	
	*** Calculate percent of each global percentile in each category
	collapse (sum) pop if global_p < ., by(global_p category bin_year)
	quietly bys global_p bin_year: egen global_p_pop = total(pop)
	quietly gen category_pct = pop/global_p_pop*100
	
	**** Format table for export
	gen order = .
	local i = 1
	foreach cat in "United States" "United Kingdom" "Japan" "China" "India" "Russia" "Indonesia" "Mexico" "Brazil" "Nigeria" "Egypt" "Other Former Soviet States" "Other North America" "Other Europe & Central Asia" "Other Latin America & Carribean" ///
		"Other East Asia & Pacific" "Other South Asia" "Other Sub-Saharan Africa" "Other Middle East & North Africa" {
		replace order = `i' if category == "`cat'"
		local i = `i' + 1
	}
	keep category order bin_year global_p category_pct
	quietly reshape wide category_pct, i(category order bin_year) j(global_p)
	foreach var of varlist category_pct* {
		local lbl = substr("`var'", 12, strlen("`var'"))
		label variable `var' "`lbl'"
		quietly replace `var' = 0 if `var' == .
	}
	sort bin_year order
	export excel bin_year category category_pct* using "Tables/Elephant Graphs.xlsx", sheet(`graph_name') firstrow(varlabels) sheetmodify cell(R40)

	restore
end

*****************************************************************
*****************************************************************
*** Define a program to make a non-anonymous elephant chart
***		`1' = variable with decile mean incomes
***		`2' = first year
***		`3' = last year
***		`4' = sample to use
***		`5' = name of graph to save
***		`6' = name of graph for labels
*****************************************************************
*****************************************************************

capture program drop MAKE_ELEPHANT_NONANON
program define MAKE_ELEPHANT_NONANON

	args quant_inc first_year last_year sample graph_name graph_name_long

	preserve
	
	keep if `sample' & inlist(bin_year, `first_year', `last_year')
	
	**************************************
	*** Calculate growth in income for each national percentile
	**************************************
	keep `quant_inc' pop surveyyear countryname  bin_year urbrur p 
	reshape wide `quant_inc' pop surveyyear, i(countryname urbrur p) j(bin_year)
	gen country_growth = ((`quant_inc'`last_year' - `quant_inc'`first_year')/`quant_inc'`first_year') * ((`last_year'-`first_year')/(surveyyear`last_year' - surveyyear`first_year'))

	**************************************
	*** Assign global p's (percentiles of population, ordered by income)
	**************************************

	*** Mark global p's in the starting year (`first_year')
	xtile global_p`first_year' = `quant_inc'`first_year'  if `quant_inc'`last_year' < . [aweight=pop`first_year'], n(100)

	*** Make ventiles plus top 1%, per L&M
	rename global_p`first_year' global_p`first_year'_orig
	gen global_p`first_year' = ceil(global_p`first_year'_orig/5)*5
	replace global_p`first_year' = 99 if global_p`first_year'_orig >= 96 & global_p`first_year'_orig <= 99

	*** Mark global p's in the ending year (`last_year')
	xtile global_p`last_year' = `quant_inc'`last_year' [aweight=pop`first_year'], n(100)

	*** Make ventiles plus top 1%, per L&M
	rename global_p`last_year' global_p`last_year'_orig
	gen global_p`last_year' = ceil(global_p`last_year'_orig/5)*5
	replace global_p`last_year' = 99 if global_p`last_year'_orig >= 96 & global_p`last_year'_orig <= 99

	**************************************
	*** Show country deciles in each global quantile
	**************************************
	sort global_p`first_year' countryname p
	export excel global_p`first_year' countryname urbrur p `quant_inc'`first_year' `quant_inc'`last_year' country_growth pop`first_year' if global_p`first_year' < . & country_growth < . using "Tables/Elephant Graphs.xlsx", sheet(`graph_name') firstrow(variables) sheetmodify cell(A1)

	**************************************
	*** Get mean income growth for each global quantile in year year
	************************************** 
	collapse (mean) country_growth [pweight=pop`first_year'], by(global_p`first_year')

	**************************************
	*** Plot elephant chart
	**************************************
	sort global_p`first_year'
	twoway connected country_growth global_p`first_year', ///
		ylab(0(.2)1.2)  name(`graph_name', replace) title("`graph_name'")
	graph export "Graphs/Elephant Chart - `graph_name'.png", replace
	putexcel set "Tables/Elephant Graphs.xlsx", sheet(`graph_name') modify
	putexcel Q1 = picture("Graphs/Elephant Chart - `graph_name'.png")
	erase "Graphs/Elephant Chart - `graph_name'.png"

	*** Eport to a dta file
	keep global_p`first_year' country_growth
	rename (global_p`first_year' country_growth) (global_p growth)
	gen graph_name = "`graph_name'"
	gen graph_name_long = "`graph_name_long'"
	drop if global_p == .
	append using "Output Data/Elephant Charts.dta"
	save "Output Data/Elephant Charts.dta", replace
	
	restore

end

*************************************************************************************
*************************************************************************************
*************************************************************************************
*** 	2. Produce growth incidence curves using various datasets and time periods, as follows
***			- Original Lakner-Milanovic (LM) data
***			- Original LM data + consistent sample
***			- Original LM countries + consistent sample + 2011 PPP quantile incomes from PovcalNet, if possible, and from LM, if not
*** 		- Original LM countries + consistent sample + 2011 PPP quantile incomes from PovcalNet, if possible, and from LM otherwise + adjustments to line up year
*** 		- New data + percentiles + consistent sample + adjustments to bin year
*** 		- New data + filled in by growing survey means in accordance with national accounts and using distributional data from any year
*** 		- New data + filled in + deciles + non-anonymous*************************************************************************************
*************************************************************************************
*************************************************************************************

*****************************************************************
*****************************************************************
*** Create a blank file to store elephant plots in
*****************************************************************
*****************************************************************
clear
gen graph_name = ""
gen graph_name_long = ""
gen global_p = .
gen growth = .
save "Output Data/Elephant Charts.dta", replace

*****************************************************************
*****************************************************************
*** Original LM Data
*****************************************************************
*****************************************************************

**************************************
*** Read in the data
**************************************
use "Input Data/LM Quantile Data - Original.dta", clear

**************************************
*** Make the chart
**************************************
MAKE_ELEPHANT quant_inc_lm 1988 2008 "bin_year < ." 1 original  "Original"
MAKE_ELEPHANT_TABLE quant_inc_lm 1988 2008 "bin_year < ."  original  "Original"

*****************************************************************
*****************************************************************
*** Original LM data, consistent sample
*****************************************************************
*****************************************************************

**************************************
*** Read in the data
**************************************
use "Input Data/LM Quantile Data - Original.dta", clear

**************************************
*** Mark consistent sample
**************************************
CONSISTENT_SAMPLE 1988 2008 quant_inc_lm
quietly tab countryname if both_periods == 1
return list // 60 countries

**************************************
*** Make the chart
**************************************
MAKE_ELEPHANT quant_inc_lm 1988 2008 "bin_year < . & both_periods == 1" 1 orig_cons_samp "Original with Consistent Sample"
MAKE_ELEPHANT_TABLE quant_inc_lm 1988 2008 "bin_year < . & both_periods == 1" orig_cons_samp "Original with Consistent Sample"

**************************************
*** Check % of population captured
**************************************
keep if both_periods == 1
keep countryname 
duplicates drop
tempfile in_sample
save `in_sample'.dta, replace

*** Read in population data
wbopendata, clear indicator(SP.POP.TOTL) year(2008) long nometadata
drop if region == "Aggregates" | (region == "" & countryname != "Kosovo")
replace region = subinstr(region, " (all income levels)", "", .)
rename sp_pop_totl pop

*** Merge in list of countries in the sample
merge 1:1 countryname using `in_sample'.dta, assert(1 3)
gen in_sample = (_merge == 3)
tab in_sample, m

*** Calculate % of global population in the sample
egen world_pop = total(pop)
collapse (sum) pop (first) world_pop, by(in_sample)
gen pop_pct = pop/world_pop*100
list 

*****************************************************************
*****************************************************************
*** Original LM countries + consistent sample + 2011 PPP quantile incomes from PovcalNet, if possible, and from LM, if not
*****************************************************************
*****************************************************************

**************************************
*** Read in the data
**************************************

*** Get a list of surveys  in the LM sample
use "Input Data/LM Quantile Data - Original.dta", clear
keep if inlist(bin_year, 1988, 2008) & quant_inc_lm < .
keep countryname urbrur surveytype bin_year pop totpop
duplicates drop

*** Merge in PCN 2011 PPP decile data 
merge 1:m countryname urbrur surveytype bin_year using "Input Data/PCN Quantile Data - Deciles.dta", keep(1 3)

*** Merge in LM data converted from 2005 PPP to 2011 PPP for remaining cases
preserve
keep if _merge == 1
keep countryname urbrur surveytype bin_year pop totpop
merge 1:m countryname urbrur surveytype bin_year using "Input Data/LM Quantile Data - 2011 PPP.dta", keep(1 3) nogen
tempfile from_lm
save `from_lm'.dta, replace
restore
drop if _merge == 1
drop _merge
append using `from_lm'.dta

*** Make a single variable containing quantile incomes
gen quant_inc = quant_inc_pcn_dec
replace quant_inc = quant_inc_lm_2011ppp if quant_inc == .

**************************************
*** Mark consistent sample
**************************************
CONSISTENT_SAMPLE 1988 2008 quant_inc

**************************************
*** Make the chart
**************************************
MAKE_ELEPHANT quant_inc 1988 2008 "bin_year < . & both_periods == 1" 1 orig_cons_samp_2011ppp "Original with Consistent Sample, 2011 PPP"
MAKE_ELEPHANT_TABLE quant_inc 1988 2008 "bin_year < . & both_periods == 1" orig_cons_samp_2011ppp "Original with Consistent Sample, 2011 PPP"

*****************************************************************
*****************************************************************
*** Original LM countries + consistent sample + 2011 PPP quantile incomes from PovcalNet, if possible, and from LM otherwise + adjustments to line up year
*****************************************************************
*****************************************************************

**************************************
*** Read in the data
**************************************

*** Get a list of surveys  in the LM sample
use "Input Data/LM Quantile Data - Original.dta", clear
keep if inlist(bin_year, 1988, 2008) & quant_inc_lm < .
keep countryname urbrur surveytype bin_year pop totpop
duplicates drop

*** Merge in PCN 2011 PPP decile data, adjusted to line-up year
merge 1:m countryname urbrur surveytype bin_year using "Input Data/PCN Quantile Data - Deciles, Adjusted.dta", keep(1 3)

*** Merge in LM data converted from 2005 PPP to 2011 PPP for remaining cases, adjusted to line-up year
preserve
keep if _merge == 1
keep countryname urbrur surveytype bin_year pop totpop
merge 1:m countryname urbrur surveytype bin_year using "Input Data/LM Quantile Data - 2011 PPP, Adjusted.dta", keep(1 3) nogen
tab p, m
tempfile from_lm
save `from_lm'.dta, replace
restore
drop if _merge == 1
drop _merge
append using `from_lm'.dta

*** Make a single variable containing quantile incomes
gen quant_inc = quant_inc_pcn_dec_adj
replace quant_inc = quant_inc_lm_2011ppp_adj if quant_inc == .

**************************************
*** Mark consistent sample
**************************************
CONSISTENT_SAMPLE 1988 2008 quant_inc

**************************************
*** Make the chart
**************************************
MAKE_ELEPHANT quant_inc 1988 2008 "bin_year < . & both_periods == 1" 1 orig_cons_samp_2011ppp_adj "Original with Consistent Sample, 2011 PPP, Adjusted to Line-Up Year"
MAKE_ELEPHANT_TABLE quant_inc 1988 2008 "bin_year < . & both_periods == 1" orig_cons_samp_2011ppp_adj "Original with Consistent Sample, 2011 PPP, Adjusted to Line-Up Year"

*****************************************************************
*****************************************************************
*** New data + percentiles + consistent sample + adjustments to bin year
*****************************************************************
*****************************************************************

**************************************
*** Read in the data
**************************************
use "Input Data/PCN Quantile Data - Adjusted.dta", clear

*** Add in surveys from the original LM data
preserve
keep countryname bin_year 
keep if bin_year < .
duplicates drop
merge 1:m countryname bin_year using "Input Data/LM Quantile Data - 2011 PPP, Adjusted.dta", keep(2) nogen norep
tab countryname bin
tempfile from_lm
save `from_lm'.dta, replace
restore
append using `from_lm'.dta

*** Add in surveys from WIID
preserve
keep countryname bin_year 
keep if bin_year < .
duplicates drop
merge 1:m countryname bin_year using "Input Data/WIID Quantile Data - Adjusted.dta", keep(2) nogen norep
tab countryname bin if surveytype == "I"
tab countryname bin if surveytype == "C"
tempfile from_wiid
save `from_wiid'.dta, replace
restore
append using `from_wiid'.dta

*** Make a single variable containing quantile incomes
gen quant_inc = quant_inc_pcn_adj
replace quant_inc = quant_inc_lm_2011ppp_adj if quant_inc == .
replace quant_inc = quant_inc_wiid_adj if quant_inc == .

**************************************
*** Mark consistent sample for 1988-2008
**************************************
CONSISTENT_SAMPLE 1988 2008 quant_inc
quietly tab countryname if both_periods == 1
return list

**************************************
*** Make the chart for 1988-2008
**************************************
MAKE_ELEPHANT quant_inc 1988 2008 "bin_year < . & both_periods == 1" 0 new_cons_samp_adj_pctile "Updated Data with Consistent Sample, 2011 PPP, Adjusted to Line-Up Year"
MAKE_ELEPHANT_TABLE quant_inc 1988 2008 "bin_year < . & both_periods == 1" new_cons_samp_adj_pctile "Updated Data with Consistent Sample, 2011 PPP, Adjusted to Line-Up Year"

**************************************
*** Mark consistent sample for 1993-2013
**************************************
drop both_periods 
CONSISTENT_SAMPLE 1993 2013 quant_inc
quietly tab countryname if both_periods == 1
return list 

**************************************
*** Make chart for 1993-2013
**************************************
MAKE_ELEPHANT quant_inc 1993 2013 "bin_year < . & both_periods == 1" 0 new_cons_samp_adj93_pctile "Updated Data with Consistent Sample, 2011 PPP, Adjusted to Line-Up Year"
MAKE_ELEPHANT_TABLE quant_inc 1993 2013 "bin_year < . & both_periods == 1" new_cons_samp_adj93_pctile "Updated Data with Consistent Sample, 2011 PPP, Adjusted to Line-Up Year"

**************************************
*** Check % of population captured
**************************************
keep if both_periods == 1
keep countryname 
duplicates drop
tempfile in_sample
save `in_sample'.dta, replace

*** Read in population data
wbopendata, clear indicator(SP.POP.TOTL) year(2008) long nometadata
drop if region == "Aggregates" | (region == "" & countryname != "Kosovo")
replace region = subinstr(region, " (all income levels)", "", .)
rename sp_pop_totl pop

*** Merge in list of countries in the sample
merge 1:1 countryname using `in_sample'.dta, assert(1 3)
gen in_sample = (_merge == 3)
tab in_sample, m

*** Calculate % of global population in the sample
egen world_pop = total(pop)
collapse (sum) pop (first) world_pop, by(in_sample)
gen pop_pct = pop/world_pop*100
list 

*****************************************************************
*****************************************************************
*** New data + filled in by growing survey means in accordance with national accounts and using distributional data from any year
*****************************************************************
*****************************************************************

**************************************
*** Read in the filled in data
**************************************
use "Input Data/Combined Quantile Data - Filled In.dta", clear

**************************************
*** Mark consistent sample for 1993-2013
**************************************
CONSISTENT_SAMPLE 1993 2013 quant_inc_fillin

**************************************
*** Make chart for 1993-2013
**************************************
MAKE_ELEPHANT quant_inc_fillin 1993 2013 "quant_inc_fillin < . & both_periods == 1" 0 new_fillin "Updated Data with Consistent Sample, 2011 PPP, Filled In If Any Survey"
MAKE_ELEPHANT_TABLE quant_inc_fillin 1993 2013 "quant_inc_fillin < . & both_periods == 1" new_fillin "Updated Data with Consistent Sample, 2011 PPP, Filled In If Any Survey"

**************************************
*** Check survey types
**************************************
preserve
keep if both_periods == 1
keep countryname surveytype
duplicates drop
isid countryname
tab surveytype
restore

**************************************
*** Check % of population captured
**************************************
keep if both_periods == 1
keep countryname 
duplicates drop
tempfile in_sample
save `in_sample'.dta, replace

*** Read in population data
wbopendata, clear indicator(SP.POP.TOTL) year(2013) long nometadata
drop if region == "Aggregates" | (region == "" & countryname != "Kosovo")
replace region = subinstr(region, " (all income levels)", "", .)
rename sp_pop_totl pop

*** Merge in list of countries in the sample
merge 1:1 countryname using `in_sample'.dta, assert(1 3)
gen in_sample = (_merge == 3)
tab in_sample, m

*** Calculate % of global population in the sample
egen world_pop = total(pop)
collapse (sum) pop (first) world_pop, by(in_sample)
gen pop_pct = pop/world_pop*100
list 

*****************************************************************
*****************************************************************
*** New data + filled in + deciles + non-anonymous
*****************************************************************
*****************************************************************

**************************************
*** Read in the data
**************************************
use "Input Data/Combined Quantile Data - Filled In, Deciles.dta", clear

**************************************
*** Make non-anonymous chart
**************************************
MAKE_ELEPHANT_NONANON quant_inc_fillin_dec 1993 2003 "quant_inc < ." nonanon93 "Updated Data with Consistent Sample, 2011 PPP, Quasi-Non-Anonymous Method"

*************************************************************************************
*************************************************************************************
*************************************************************************************
***		3. Use the data already calculated to produce charts for the publication
*************************************************************************************
*************************************************************************************
*************************************************************************************

**************************************
*** Read in the data
**************************************
use "Output Data/Elephant Charts.dta", clear

*** Add in WID Global GIC data (which is available online as a global distribution, therefore we did not need to calculate the global distribution ourselves)
append using "Input Data/WID Global GIC Data.dta"

**************************************
*** Define a progarm to create legend labels
**************************************
capture program drop DEFINE_LABELS
program DEFINE_LABELS
	
	args graph_names_list
	
	local i = 1
	foreach graph in `graph_names_list' {
		
		preserve
		quietly keep if graph_name == "`graph'"
		global label`i' = graph_name_long[1]
		restore
		
		local i = `i'+1
	}
	
	global legend = `"legend(on size(vsmall) symxsize(*.5) label(1 "$label1") label(2 "$label2") label(3 "$label3") label(4 "$label4") label(5 "$label5") label(6 "$label6") label(7 "$label7") label(8 "$label8")) "'
	
end

format growth %10.1fc

global graph_ops = `"ylab(0(0.2)1.0, noticks) xlab(0(10)100) xscale(range(0, 100)) yscale(noline range(0, 1.1)) ytitle("Income Growth") xtitle("Global Quantile") aspectratio(.65) plotregion(margin(zero))"'

	
**************************************
*** Figure 1
**************************************
DEFINE_LABELS "original orig_cons_samp orig_cons_samp_2011ppp new_cons_samp_adj_pctile"

twoway (connected growth global_p if graph_name == "original", msize(tiny) lcolor("206 4 4") mcolor("206 4 4") lwidth(medthick) msymbol(circle)), ///
	$graph_ops $legend xsize(8) ysize(6)
graph export "Graphs/Figure 1.pdf", replace
graph export "Graphs/Figure 1.eps", replace

**************************************
*** Figure 2
**************************************
twoway (line growth global_p if graph_name == "original", msize(tiny) lcolor("255 183 185") lwidth(medthick)) ///
	(connected growth global_p if graph_name == "orig_cons_samp", msize(tiny) lcolor("2 123 218") mcolor("2 123 218") lwidth(medthick) msymbol(circle)), ///
	$graph_ops  $legend xsize(8) ysize(6.4)
graph export "Graphs/Figure 2.pdf", replace
graph export "Graphs/Figure 2.eps", replace

**************************************
*** Figure 3
**************************************
twoway (line growth global_p if graph_name == "original", msize(tiny) lcolor("255 183 185") lwidth(medthick)) ///
	(line growth global_p if graph_name == "orig_cons_samp", msize(tiny) lcolor("146 208 254") lwidth(medthick)) ///
	(connected growth global_p if graph_name == "orig_cons_samp_2011ppp", msize(tiny) lcolor("45 63 160") mcolor("45 63 160") lwidth(medthick)  msymbol(circle)), ///
	$graph_ops  $legend xsize(8) ysize(6.8)
graph export "Graphs/Figure 3.pdf", replace
graph export "Graphs/Figure 3.eps", replace

**************************************
*** Figure 4
**************************************
twoway (line growth global_p if graph_name == "original", msize(tiny) lcolor("255 183 185") lwidth(medthick)) ///
	(line growth global_p if graph_name == "orig_cons_samp", msize(tiny) lcolor("146 208 254") lwidth(medthick)) ///
	(line growth global_p if graph_name == "orig_cons_samp_2011ppp", msize(tiny) lcolor("169 179 231") lwidth(medthick)) ///
	(connected growth global_p if graph_name == "new_cons_samp_adj_pctile", msize(tiny) lcolor("253 157 36") mcolor("253 157 36") lwidth(medthick) msymbol(circle)), ///
	$graph_ops  $legend xsize(8) ysize(7.2) name(fig4, replace)
graph export "Graphs/Figure 4.pdf", replace
graph export "Graphs/Figure 4.eps", replace

**************************************
*** Figure 5
**************************************
DEFINE_LABELS "original orig_cons_samp orig_cons_samp_2011ppp new_cons_samp_adj_pctile"
twoway (line growth global_p if graph_name == "original", msize(tiny) lcolor("255 183 185") lwidth(medthick)) ///
	(line growth global_p if graph_name == "orig_cons_samp", msize(tiny) lcolor("146 208 254") lwidth(medthick)) ///
	(line growth global_p if graph_name == "orig_cons_samp_2011ppp", msize(tiny) lcolor("169 179 231") lwidth(medthick)) ///
	(line growth global_p if graph_name == "new_cons_samp_adj_pctile", msize(tiny) lcolor("254 205 146") lwidth(medthick)), ///
	$graph_ops $legend legend(rows(4)) xsize(8) ysize(7.6) title("1988-2008") name(left, replace)

DEFINE_LABELS "new_cons_samp_adj93_pctile new_fillin"
twoway	(connected growth global_p if graph_name == "new_cons_samp_adj93_pctile", msize(tiny) lcolor("4 158 140") mcolor("4 158 140") lwidth(medthick) msymbol(circle)), ///
	$graph_ops $legend legend(rows(10) holes(2/10)) xsize(8) ysize(7.6) title("1993-2013") name(right, replace)

graph combine left right, xcommon ycommon
graph export "Graphs/Figure 5.pdf", replace
graph export "Graphs/Figure 5.eps", replace

**************************************
*** Figure 6
**************************************
twoway (line growth global_p if graph_name == "new_cons_samp_adj93_pctile", msize(tiny) lcolor("163 237 228") lwidth(medthick)) ///
	(connected growth global_p if graph_name == "new_fillin", msize(tiny) lcolor("2 123 218") mcolor("2 123 218") lwidth(medthick) msymbol(circle)), ///
	$graph_ops $legend legend(rows(8) holes(3/8)) xsize(8) ysize(6.4) title("1993-2013") name(right, replace)
graph combine left right, xcommon ycommon
graph export "Graphs/Figure 6.pdf", replace
graph export "Graphs/Figure 6.eps", replace

**************************************
*** Figure 7
**************************************
DEFINE_LABELS "new_fillin wid_93"
twoway (connected growth global_p if graph_name == "new_fillin", msize(tiny) lcolor("2 123 218") mcolor("2 123 218") lwidth(medthick) msymbol(circle)) ///
	(connected growth global_p if graph_name == "wid_93", msize(tiny) lcolor("253 157 36") mcolor("253 157 36") lwidth(medthick) msymbol(circle)), ///
	$graph_ops  $legend xsize(8) ysize(6.4)
graph export "Graphs/Figure 7.pdf", replace
graph export "Graphs/Figure 7.eps", replace

**************************************
*** Figure 8
**************************************
DEFINE_LABELS "new_fillin nonanon93"
twoway (connected growth global_p if graph_name == "new_fillin", msize(tiny) lcolor("2 123 218") mcolor("2 123 218") lwidth(medthick) msymbol(circle)) ///
	(connected growth global_p if graph_name == "nonanon93", msize(tiny) lcolor("45 63 160") mcolor("45 63 160") lwidth(medthick) msymbol(circle)), ///
	$graph_ops  $legend xsize(8) ysize(6.4)
graph export "Graphs/Figure 8.pdf", replace
graph export "Graphs/Figure 8.eps", replace
